#########################################################################
# Replication File: "The Politics of Too Much - The Decline in Labor Power 
# and the Global Rise of Corporate Saving" 
# Author: Nils Redeker
# Appendix C Firm-Level Savings & Trade Union Density in Germany
# R version 3.6.2 (2019-12-12)
#########################################################################

# clean working environment
rm(list = ls())

library(haven)
library(dplyr)
library(sjlabelled)
library(readr)
library(ggplot2)
library(lfe)
library(stargazer)
library(tidyr)
library(eurostat)
library(sjPlot)


# load data --- --- ---- --- --- --- --- --- --- --- --- --- -- --- --- --- --- --- --- -- --- --- --- --- ----- 

# SOEP data & weights are available from https://www.diw.de/soep. For details README file

soep <- read_csv("~/DE_household_data_SOEP.csv")  %>% remove_all_labels() %>% 
  dplyr::select(pid, hid, syear, p_nace, p_nace2, plh0263_v2, plb0022_h) %>% 
  filter(plh0263_v2!=-8) # drop waves without trade union item

soep_weight<-read_dta("~/DE_household_weights_SOEP.csv") %>% 
  remove_all_labels()

# Country level data on union membership from Armingeon et al. (2020)
cpds<-read_dta("~/Downloads/CPDS-1960-2017-Update-2019.dta") %>% 
  filter(country=="Germany", year>1997) %>% dplyr::select(year, ud)

# Eurostat National accounts aggregates for industry level controls 
industry<-get_eurostat("nama_10_a64")
employment<-get_eurostat("nama_10_a64_e")

# Firm level data from COMPUSTAT 
firms.de<- read_csv("~/Downloads/f2567715edd65a79.csv") 

# NACE-SIC correspondance table
nace_SIC <-read_csv("~/Dropbox/02_PhD/07_Corporate_Savings/02Code/02Analysis/R&R/nace_SIC_clean_complete.csv")

#------------------------------------------------------------------------------------------------------#
# I) PREPARE SOEP BASED ESTIMATES FOR TRADE UNION DENSITY
#------------------------------------------------------------------------------------------------------#

# 1) PREPARE SOEP DATA --------------------------------------------------------------------------------#

soep<-left_join(soep, soep_weight, by="pid") %>% # merge household data with weights
  filter(plh0263_v2!=-5) %>% # drop non-responses 
  mutate(union=dplyr::recode(plh0263_v2, "2"=0, "-1"=0)) # recode dummy for union membership

# create weight panel for each year
soep<-soep %>% mutate(weight=  ifelse(syear==1998, ophrf,
         ifelse(syear==2000, qphrf,
         ifelse(syear==2001, rphrf, ifelse(syear==2002, sphrf,
         ifelse(syear==2003, tphrf, ifelse(syear==2004, uphrf,
         ifelse(syear==2005, vphrf, ifelse(syear==2006, wphrf,
         ifelse(syear==2007, xphrf, ifelse(syear==2007, yphrf,
         ifelse(syear==2008, zphrf, ifelse(syear==2009, baphrf, 
         ifelse(syear==2010, bbphrf,ifelse(syear==2011, bcphrf, 
         ifelse(syear==2012, bdphrf,ifelse(syear==2013, bephrf, 
         ifelse(syear==2014, bfphrf,ifelse(syear==2015, bgphrf, 
         ifelse(syear==2016, bhphrf, ifelse(syear==2017, bhphrf, 
         is.na(.))))))))))))))))))))))


# 2) CREATE SOEP BASED ESTIMATES OF UNION DENSITY ----------------------------------------------------#

# 2.1 Create SOEP based estimates of trade union density by year

tu_soep_an<-soep %>% group_by(syear) %>% filter(plb0022_h==1 ) %>% # drop non-working populaton
  dplyr::summarise(ud_soep = weighted.mean(union, weight)*100, n=n()) %>% # estimate weighted mean by year
  mutate(syear=as.integer(syear))

# 2.2 Create SOEP based estimates of trade union density by year and sector

soep <- soep %>% mutate(n_nace=nchar(p_nace)) %>% filter(p_nace>=0) %>% # drop non-responses
  mutate(p_nace=ifelse(n_nace==1, paste0("0", p_nace), p_nace)) %>% # prepare for single digit
  mutate(nace=substr(p_nace,1,1))

tu_soep_sector<- soep %>% group_by(syear, nace) %>% # create tu density by year/sector
  dplyr::summarize(ud=mean(union)*100, n=n(), 
                   ud_weight = weighted.mean(union, weight)*100, # weighted average union density
                   na.rm=T) %>% mutate(fyear=syear, nace=as.character(nace)) # prepare merging variables


# 3) PREPARE SECTOR LEVEL CONTROLS  -------------------------------------------------------------------#

# 3.1 Create one digit sector categories 
industry_short <- industry %>% 
  mutate(nace_r2=as.character(nace_r2), nchar=nchar(nace_r2)) %>% 
  filter(geo=="DE", nchar==3, grepl("[0-9]", nace_r2)) %>% 
  mutate(nace_num=readr::parse_number(nace_r2),
         nchar2=nchar(nace_num),
         nace_num=ifelse(nchar2==1, paste0("0", nace_num), nace_num),
         nace_one=substr(nace_num,1,1)) %>% 
  filter(unit=="CP_MEUR") %>% 
  spread(na_item, values)

# 3.2 Create aggregate sector level controls 
industry_nace_one<-industry_short %>% group_by(time, nace_one) %>% 
  dplyr::summarise(gva=sum(B1G),output=sum(P1), comp=sum(D1)) %>% 
  ungroup() %>% group_by(nace_one) %>% 
  mutate(gva_growth=((gva-lag(gva))/lag(gva))*100, # gross value added growthh
         output_growth=((output-lag(output))/lag(output))*100, # output growth
         comp_growth=((comp-lag(comp))/lag(comp))*100, # compensation growth
         fyear=as.numeric(substr(time,1,4))) # year variable 


# 3.3 Merge SOEP based union density estimates with sector-level controls
tu_soep_sector<-left_join(tu_soep_sector, industry_nace_one, by=c("fyear", "nace"="nace_one")) %>%
  mutate(nace=as.numeric((nace)), fyear=as.integer(fyear))

# 3.4 Drop sectors with too little observations for estimation of union density
tu_soep_sector<- tu_soep_sector %>% mutate(ud_weight = replace(ud_weight, which(n<500), NA))
tu_soep_sector<- tu_soep_sector %>% group_by(nace) %>% mutate(mean=mean(ud_weight, na.rm=T)) %>% filter(mean!="NaN")


# 4) MERGE SECTOR AND FIRM LEVEL DATA  ---------------------------------------------------------#

# Merge wiht correspondence table and create one digit sector variables
firms.de <- firms.de %>% mutate(sic=as.character(sic)) 

firms.de<-  left_join(firms.de, nace_SIC, by=c("sic"))%>% 
  mutate(nace=ifelse(is.na(nace), sic, nace), 
         nace_one=as.numeric(substr(nace,1,1)),
         sic_one=as.numeric(substr(sic,1,1))) %>% 
  filter(fyear>1998, sic_one!=6) %>%
  filter(sic_one!=6, !grepl(' SE', conm)) # drop SEs


# Merge firm and sector level data
firm_sector_ud<-left_join(firms.de, tu_soep_sector, 
                          by=c("fyear", "nace_one"="nace"), keep=T) %>% 
  mutate(per=ifelse(emp>=2,1,0), # generate treatment dummy
         cash=ch + ivst, # generate cash variable 
         cash_ratio = cash/at) # cash per total assets)



#------------------------------------------------------------------------------------------------------#
# 5) FIGURE C.11: COMPARISON OF GERMAN TRADE UNION DENSITY IN CPDS WITH SOEP-BASED ESTIMATES
#------------------------------------------------------------------------------------------------------#

plot_compare<-left_join(tu_soep_an, cpds, by=c("syear"="year")) %>% 
  dplyr::select(syear, ud, ud_soep) %>% 
  gather("source", "ud", ud:ud_soep)

ggplot(plot_compare, aes(x=syear, y=ud, linetype=source)) + geom_line() + theme_minimal() +
  ylab("Trade Union Density") + xlab("") + scale_linetype_discrete(labels=c("CPDS", "SOEP"))  +
  theme(legend.position = "bottom", legend.title = element_blank(), 
        text = element_text(size=12, family="Frutiger-Light")) 


#------------------------------------------------------------------------------------------------------#
# 5) TABLE C.5: FIRM-LEVEL SAVINGS AND SECTOR-LEVEL UNION DENSITY
#------------------------------------------------------------------------------------------------------#

model1<-felm(cash_ratio~ ud_weight  |  conm  + fyear.y + nace_one| 0 | nace_one, 
             data=firm_sector_ud %>% filter( n>500))

model2<-felm(cash_ratio~ ud_weight + gva + comp |  conm  + fyear.y + sic_one| 0 | nace_one, 
             data=firm_sector_ud %>% filter( n>499))

model3<-felm(cash_ratio~ ud_weight + gva + comp  + sale + txc |  conm  + fyear.y + sic_one| 0 | nace_one, 
                  data=firm_sector_ud %>% filter(n>499))

stargazer(model1,model2,model3, type="text",
          intercept.top = TRUE, intercept.bottom = FALSE, omit = c("sic", "Constant", "gva", 
                                                                   "comp", "sale", "ppent", "at", "txc"),
          omit.stat = c("f", "ser", "adj.rsq", "rsq"), 
          covariate.labels = c("Sectoral Trade Union Density"), 
          add.lines = list(c("Sector-Level Controls", "No", "Yes", "Yes", "Yes"),
                           c("Firm-Level Controls", "No", "No", "Yes", "Yes"),
                           c("Firm FE", "Yes", "Yes", "Yes", "Yes"),
                           c("Year FE", "Yes", "Yes", "Yes", "Yes"),
                           c("Industry FE", "Yes", "Yes", "Yes", "Yes"),
                           c("Clustured SE", "Yes", "Yes", "Yes", "Yes")),
          dep.var.labels   = "Firm-level Savings Ratio",
          title = "Firm-Level Savings and Sector-Level Union Density",
          notes.append = TRUE)

